- Start by Downloading the necessary packages
- We will need
gapminder,ggthemes,plotlyandtidyverse- The main package we are using is
ggplot2but that is in thetidyversesuite
- The main package we are using is
May 29, 2020
gapminder, ggthemes, plotly and tidyverse
ggplot2 but that is in the tidyverse suitelibrary(pacman) p_load(gapminder, ggthemes, plotly, tidyverse)
ggplot2ggplot(gapminder)
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp))
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) + geom_point()
lifeExp relates to gdpPercaplifeExp increases, gdpPercap also increases holding all else constant (vice versa too)ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) + geom_point()
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
# Colour can help highlight a different variable
geom_point(alpha = 0.5, size = 2) +
# Alpha stands for transparency, size denotes the point size
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, colour = continent, size = pop)) +
# Size denotes the population size
geom_point(alpha = 0.5) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
geom_point(alpha = 0.5, size = 2) +
# . ~ continent tells us to facet the plot by the continent
facet_grid(. ~ continent) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
geom_point(alpha = 0.5, size = 2) +
# facet_wrap is similar to facet_grid but presents the plots in a different manner
facet_wrap(. ~ continent) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
geom_smooth evaluates the relationship between variablesggplot(gapminder, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
geom_point(alpha = 0.5, size = 2) +
facet_grid(. ~ continent) +
# geom_smooth allows us to plot the linear relationship between 2 variables
# method determines the modeling method, in this case linear model
# se determines whether we want to plot the standard error
# lwd states the line width
geom_smooth(color = "black", method = "lm", se = TRUE, lwd = 0.5) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
geom_smooth evaluates the relationship between variablesggplot(gapminder, aes(x = gdpPercap, y = lifeExp, colour = continent)) +
geom_point(alpha = 0.5, size = 2) +
facet_grid(. ~ continent) +
geom_smooth(color = "black", method = 'loess', se = TRUE, lwd = 0.5) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10() + # Rescale the axis
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggthemes allow us to quickly get certain visualisations using themesggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
theme_solarized() + # Add Theme
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "Tracking the relationship between GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
theme_solarized()theme_wsj()theme_fivethirtyeight()theme_economist_white()plotlyplot <- gapminder %>%
filter(year == 2007) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, colour = continent, size = pop)) +
# Size denotes the population size
geom_point(alpha = 0.5) +
labs(x = "GDP per capita",
y = "Life Expectancy",
title = "GDP per capita and Life Expectancy",
caption = "Source: gapminder dataset")
ggplotly(plot)
plotlyplotlygapminder %>%
plot_ly(x = ~gdpPercap, y = ~lifeExp, size = ~pop,
color = ~continent, frame = ~year, text = ~country,
hoverinfo = "text", type = 'scatter', mode = 'markers') %>%
layout(xaxis = list(type = "log"))
gapminder %>%
filter(year == 2007) %>%
select(country, lifeExp) %>%
ggplot(aes(x = lifeExp)) +
geom_histogram() +
labs(x = "life expectancy",
y = "frequency",
title = "Histogram of global life expectancy in 2007")
Depends on whether the variable is discrete or continuous
For 1 discrete and 1 continuous, use:
geom_col()geom_boxplot()For 2 discrete, use:
geom_count()For 2 continuous, use:
geom_line()geom_area()geom_step()Use Coronavirus dataset found here